## Top words/bigrams for each frame
# Nora Webb Williams
# 2.13.2019

library(readr)
library(tidyr)
library(dplyr)
library(stm)
library(tm)
library(stringr)
#library(quanteda)
#library(readtext)
library(xtable)

# read in the data
master <- read_csv("results-data/tweets_with_topic_probs.csv", 
                   col_types = cols(.default = col_character(),
                                    num_followers = col_integer(),
                                    initial_retweets = col_integer(),
                                    initial_favorites = col_integer(),
                                    delayed_retweets = col_integer(),
                                    delayed_favorites = col_double(),
                                    images_list = col_character(),
                                    video_id = col_character(),
                                    X1 = col_integer(),
                                    name = col_character(),
                                    tweet_text = col_character(),
                                    # is_valid = col_character(),
                                    gun_control = col_integer(),
                                    simple_date = col_date(format = ""),
                                    `Gun Rights` = col_character(),
                                    Incredulity = col_character(),
                                    meta_youth_leadership = col_character(),
                                    `Pride Brunch` = col_character()
                   ))



### Subset for each frame (school security, political action, mental health,
# gun free zones, gun control, enforcement failure, background checks, assault weapons)

#########################
## School security
school_security <- master %>% 
  filter(!is.na(`School Security`)) %>% 
  arrange(tweet_dayfloor)


school_sec_processed <- textProcessor(school_security$tweet_text, metadata=school_security, customstopwords = c('amp'))
school_sec_out <- prepDocuments(school_sec_processed$documents, school_sec_processed$vocab, school_sec_processed$meta)

school_sec_dtm <- as_tibble(as.matrix(convertCorpus(documents = school_sec_out$documents, school_sec_out$vocab, type = c("Matrix"))))

school_sec_summary <- school_sec_dtm %>% 
  summarise_all(sum) %>% 
  gather(key = "word", value = "count") %>% 
  arrange(desc(count)) %>% 
  slice(1:10)

#print(xtable(school_sec_summary[1:10,], digits = 0), include.rownames = F)


school_sec_row <- tibble(Frame = "School Security",
                              top_words = str_c(school_sec_summary$word, collapse = ', '))


################
## Political Action
polit_action <- master %>% 
  filter(!is.na(`Political Action`)) %>% 
  arrange(tweet_dayfloor)


polit_action_processed <- textProcessor(polit_action$tweet_text, metadata=polit_action, customstopwords = c('amp'))
polit_action_out <- prepDocuments(polit_action_processed$documents, polit_action_processed$vocab, polit_action_processed$meta)

polit_action_dtm <- as_tibble(as.matrix(convertCorpus(documents = polit_action_out$documents, polit_action_out$vocab, type = c("Matrix"))))

polit_action_summary <- polit_action_dtm %>% 
  summarise_all(sum) %>%
  gather(key = "word", value = "count") %>% 
  arrange(desc(count)) %>% 
  slice(1:10)

#print(xtable(polit_action_summary[1:10,], digits = 0), include.rownames = F)

polit_action_row <- tibble(Frame = "Political Action",
                         top_words = str_c(polit_action_summary$word, collapse = ', '))


################
## Mental Health
mental_health <- master %>% 
  filter(!is.na(`Mental Health`)) %>% 
  arrange(tweet_dayfloor)


mental_health_processed <- textProcessor(mental_health$tweet_text, metadata=mental_health, customstopwords = c('amp'))
mental_health_out <- prepDocuments(mental_health_processed$documents, mental_health_processed$vocab, mental_health_processed$meta)

mental_health_dtm <- as_tibble(as.matrix(convertCorpus(documents = mental_health_out$documents, mental_health_out$vocab, type = c("Matrix"))))

mental_health_summary <- mental_health_dtm %>% 
  summarise_all(sum) %>%
  gather(key = "word", value = "count") %>% 
  arrange(desc(count)) %>% 
  slice(1:10)

#print(xtable(mental_health_summary[1:10,], digits = 0), include.rownames = F)

mental_health_row <- tibble(Frame = "Mental Health",
                           top_words = str_c(mental_health_summary$word, collapse = ', '))


################
## Gun free zones
gun_free <- master %>% 
  filter(!is.na(`Gun Free Zones`)) %>% 
  arrange(tweet_dayfloor)


gun_free_processed <- textProcessor(gun_free$tweet_text, metadata=gun_free, customstopwords = c('amp'))
gun_free_out <- prepDocuments(gun_free_processed$documents, gun_free_processed$vocab, gun_free_processed$meta)

gun_free_dtm <- as_tibble(as.matrix(convertCorpus(documents = gun_free_out$documents, gun_free_out$vocab, type = c("Matrix"))))

gun_free_summary <- gun_free_dtm %>% 
  summarise_all(sum) %>%
  gather(key = "word", value = "count") %>% 
  arrange(desc(count)) %>% 
  slice(1:10)

#print(xtable(gun_free_summary[1:10,], digits = 0), include.rownames = F)

gun_free_row <- tibble(Frame = "Gun Free Zones",
                            top_words = str_c(gun_free_summary$word, collapse = ', '))


################
## Gun control

gun_control <- master %>% 
  filter(!is.na(`Gun Control`)) %>% 
  arrange(tweet_dayfloor)

#write_csv(gun_control, "results-data/gun_control_tweets.csv")

gun_control_processed <- textProcessor(gun_control$tweet_text, metadata=gun_control, customstopwords = c('amp'))
gun_control_out <- prepDocuments(gun_control_processed$documents, gun_control_processed$vocab, gun_control_processed$meta)

gun_control_dtm <- as_tibble(as.matrix(convertCorpus(documents = gun_control_out$documents, gun_control_out$vocab, type = c("Matrix"))))

gun_control_summary <- gun_control_dtm %>% 
  summarise_all(sum) %>%
  gather(key = "word", value = "count") %>% 
  arrange(desc(count)) %>% 
  slice(1:10)

#print(xtable(gun_control_summary[1:10,], digits = 0), include.rownames = F)

gun_control_row <- tibble(Frame = "Gun Control",
                       top_words = str_c(gun_control_summary$word, collapse = ', '))


################
## enforcement failure

enforce_failure <- master %>% 
  filter(!is.na(`Enforcement Failure`)) %>% 
  arrange(tweet_dayfloor)

write_csv(enforce_failure, "results-data/enforce_failure_tweets.csv")

enforce_failure_processed <- textProcessor(enforce_failure$tweet_text, metadata=enforce_failure, customstopwords = c('amp'))
enforce_failure_out <- prepDocuments(enforce_failure_processed$documents, enforce_failure_processed$vocab, enforce_failure_processed$meta)

enforce_failure_dtm <- as_tibble(as.matrix(convertCorpus(documents = enforce_failure_out$documents, enforce_failure_out$vocab, type = c("Matrix"))))

enforce_failure_summary <- enforce_failure_dtm %>% 
  summarise_all(sum) %>%
  gather(key = "word", value = "count") %>% 
  arrange(desc(count)) %>% 
  slice(1:10)

#print(xtable(enforce_failure_summary[1:10,], digits = 0), include.rownames = F)

enforce_failure_row <- tibble(Frame = "Enforcement Failure",
                          top_words = str_c(enforce_failure_summary$word, collapse = ', '))


################
## background checks
back_check <- master %>% 
  filter(!is.na(`Background Checks`)) %>% 
  arrange(tweet_dayfloor)

write_csv(back_check, "results-data/back_check_tweets.csv")

back_check_processed <- textProcessor(back_check$tweet_text, metadata=back_check, customstopwords = c('amp'))
back_check_out <- prepDocuments(back_check_processed$documents, back_check_processed$vocab, back_check_processed$meta)

back_check_dtm <- as_tibble(as.matrix(convertCorpus(documents = back_check_out$documents, back_check_out$vocab, type = c("Matrix"))))

back_check_summary <- back_check_dtm %>% 
  summarise_all(sum) %>%
  gather(key = "word", value = "count") %>% 
  arrange(desc(count)) %>% 
  slice(1:10)

#print(xtable(back_check_summary[1:10,], digits = 0), include.rownames = F)

back_check_row <- tibble(Frame = "Background Checks",
                              top_words = str_c(back_check_summary$word, collapse = ', '))


################
## assault weapons

assault_weap <- master %>% 
  filter(!is.na(`Assault Weapons`)) %>% 
  arrange(tweet_dayfloor)

write_csv(assault_weap, "results-data/assault_weap_tweets.csv")

assault_weap_processed <- textProcessor(assault_weap$tweet_text, metadata=assault_weap, customstopwords = c('amp'))
assault_weap_out <- prepDocuments(assault_weap_processed$documents, assault_weap_processed$vocab, assault_weap_processed$meta)

assault_weap_dtm <- as_tibble(as.matrix(convertCorpus(documents = assault_weap_out$documents, assault_weap_out$vocab, type = c("Matrix"))))

assault_weap_summary <- assault_weap_dtm %>% 
  summarise_all(sum) %>%
  gather(key = "word", value = "count") %>% 
  arrange(desc(count)) %>% 
  slice(1:10)

#print(xtable(assault_weap_summary[1:10,], digits = 0), include.rownames = F)

assault_weap_row <- tibble(Frame = "Assault Weapons",
                         top_words = str_c(assault_weap_summary$word, collapse = ', '))

################
### Pulling all together
#(school security, political action, mental health,
 # gun free zones, gun control, enforcement failure, background checks, assault weapons)

frame_words <- bind_rows(school_sec_row, 
                         polit_action_row,
                         mental_health_row,
                         gun_free_row,
                         gun_control_row,
                         enforce_failure_row,
                         back_check_row,
                         assault_weap_row)

write_csv(frame_words, "results-data/top_frame_words.csv")

print(xtable(frame_words, digits = 0), include.rownames = F)
